__author__ = 'tarek'

#the purpose of this script is to first capture all tweets on publicly-held firms and store them in the database

import pandas as pd
import tweepy
from tweepy import StreamListener
from twitterSentiment import models
from datetime import datetime
from pytz import timezone
import json, time, sys
from numpy import random
import re
from django.contrib.auth.models import User


def timeupdate(twitterdate):
        # method to return a django-supported time from twitter-based time entry
        # input comes in the following fashion: Tue Jul 02 14:33:59 +0000 2013
        # return 2013-06-18 18:23:22-04:00
        central = timezone('US/Central')
        return central.localize(datetime.strptime(twitterdate, '%a %b %d %H:%M:%S +0000 %Y'))

class TwitterListener(StreamListener):
    
    #ideas from http://digitalfoo.net/posts/using-python-and-tweepy-to-scrape-streaming-tweets-into-mongodb/
    #and  http://nbviewer.ipython.org/github/alexhanna/hse-twitter/blob/master/docs/Collecting%20Twitter%20data%20from%20the%20API%20with%20Python.ipynb
    
    def __init__(self, api = None, fprefix = 'streamer'):
        self.api = api or API()
        self.counter = 0
   
    def on_data(self, data):
        global tweetsmax
        global tweetscount
        global alltweetscount
        global iterationcount
        print ("tweets count:",tweetscount, "/",tweetsmax,". Iteration: ",iterationcount," Total tweets: ", alltweetscount)
        if (tweetsmax == tweetscount):
            tweetscount=0
            iterationcount = iterationcount+1
            return False
        else:
            tweetscount = tweetscount+1
            alltweetscount = alltweetscount +1
        try:
            tweet = json.loads(data) #convert twitter stream in json into Python dictionary
            if isinstance(tweet, dict):
                if tweet['user']['lang'] != 'en':
                    return
                else:
                    print ("tweet: ", tweet['text'])
                    TwitterDatabase(tweet)
        except:
            print ("Error in Twitter listener. Error message:", sys.exc_info())
        return
    
    def on_limit(self, track):
        print(">> limit")
        return  

    def on_error(self, status_code):
        print(">>> error: ", str(status_code) + "\n")
        return  

    def on_timeout(self):
        print(">>> timeout Sleeping for 60 seconds...\n")
        time.sleep(60)
        return  

  
def CompanyNamesbyStocks(tweet):
    #extract the company behind each stock as the latter is mentioned in a tweet
    try:
        match = re.findall("\$\w+",tweet) #captures the stock symbols
        global sp500_companies
        return (",".join(sp500_companies.Name.get(symbol,symbol)+"("+symbol+")" for symbol in match))
    except:
        return ("na")

def TwitterDatabase(tweet):
    ## take Twitter data in jsonformat and insert it into the database
    try:
        global systemid
        aTweet = models.TwitterText(twitter_user_id=tweet['user']['id'], 
                                        twitter_user_name=tweet['user']['screen_name'],
                                        twitter_text=tweet['text'], 
                                        twitter_text_id=tweet['id'], 
                                        twitter_text_timestamp=timeupdate(tweet['created_at']), 
                                        twitter_text_keyword=CompanyNamesbyStocks(tweet['text']),
                                        training_user_id=systemid)
        aTweet.save()
    except:
        print ("Error in Django insert tweet. error message:", sys.exc_info())
    return
        
def PickRandomCompanies(allstocks, stocks_count):
    #since Twitter does not allow an exhaustive keyword searcj
    stocks_random = random.choice(allstocks, stocks_count)
    return (",".join("{0}".format(stocks.strip()) for stocks in stocks_random))

def TwitterStreaming(stocks):
    ## twitter authentication keys
    
    consumer_key        = "yoWOau00G19Q81WKeVZ6g60zU"
    consumer_secret     = "A0rJ4XlMndHv2xTeQlA2t7N9thBr3FDRu6vkrCy5ab7KAiKmNB"
    access_token        = "16859687-bt1jbTlHUXO39n114gWEpg24VlKZQVbaF4AgXs4ha"
    access_token_secret = "kLMge9f3GypNwHv6N9uMCuUdLS7kr5gfR5lzTXEmwMyfi"
    global keywords
    keywords = [stocks]
    try:
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)
        listener = TwitterListener(api, "test")
        print ("Begin Twitter streaming for ", stocks)
        stream = tweepy.Stream(auth, listener)
        stream.filter(track=[stocks]) 
    except:
        print ("Error in Twitter streaming",sys.exc_info())
    return True

tweetsmax = 500
tweetscount = 1
alltweetscount = 1
iterationcount = 1

def run():
    try:
        systemid = User.objects.get(username="system").id
        sp500_companies = pd.DataFrame.from_csv("data/allpubliccomp.csv", index_col=['Symbol']) ##used a script from the internet that scrapes  spindices
        stocks = sp500_companies.index
        infinite = True
        while (infinite == True):
            TwitterStreaming(PickRandomCompanies(stocks,200))
    except:
        print ("Error occured: "+sys.exc_info())
